test of some sort

library(reticulate)
library(tidyverse)
library(lubridate)
import pandas as pd
import pprint
import requests


def collapse_results_by_party(results_by_candidate, candidates):
    results_by_party = {}
    for candidate, count in results_by_candidate.items():
        party = candidates[candidate]['party']
        results_by_party[party] = results_by_party.get(party, 0) + count

    return results_by_party

states = [
 'Alaska', 'Alabama', 'Arkansas', 'Arizona', 'California', 'Colorado',
 'Connecticut', 'Delaware', 'Florida', 'Georgia',
 'Hawaii', 'Iowa', 'Idaho', 'Illinois', 'Indiana', 'Kansas', 'Kentucky',
 'Louisiana', 'Massachusetts', 'Maryland', 'Maine', 'Michigan',
 'Minnesota', 'Missouri', 'Mississippi', 'Montana', 'North Carolina',
 'North Dakota', 'Nebraska', 'New Hampshire', 'New Jersey', 'New Mexico',
 'Nevada', 'New York', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
 'Utah', 'Virginia', 'Vermont', 'Washington', 'Wisconsin',
 'West Virginia', 'Wyoming',
]

all_results = {}
for state in states:
    print('Downloading {}'.format(state))
    formatted_state = state.lower().replace(' ', '-')
    state_results = requests.get('https://static01.nyt.com/elections-assets/2020/data/api/2020-11-03/race-page/{}/president.json'.format(formatted_state)).json()
    all_results[formatted_state] = state_results
records = []
for state, state_results in all_results.items():
    race = state_results['data']['races'][0]

    for candidate in race['candidates']:
        if candidate['party_id'] == 'republican':
            candidate['party'] = 'rep'
        elif candidate['party_id'] == 'democrat':
            candidate['party'] = 'dem'
        else:
            candidate['party'] = 'trd'
    candidates = { candidate['candidate_key']: candidate for candidate in race['candidates'] }

    for data_point in race['timeseries']:
        data_point['state']             = state
        data_point['expected_votes']    = race['tot_exp_vote']
        data_point['trump2016']         = race['trump2016']
        data_point['votes2012']         = race['votes2012']
        data_point['votes2016']         = race['votes2016']

        vote_shares = collapse_results_by_party(data_point['vote_shares'], candidates)
        for party in ['rep', 'dem', 'trd']:
            data_point['vote_share_{}'.format(party)] = vote_shares.get(party, 0)

        data_point.pop('vote_shares')
        records.append(data_point)
time_series_df = pd.DataFrame.from_records(records)
#time_series_df.to_csv('data/nyt_ts.csv', encoding='utf-8')
votes <- py$time_series_df
states <- py$states %>% str_to_lower() %>% str_replace(" ","-")



data <- votes %>% 
  mutate(trump_nominal = votes * vote_share_rep,
         biden_nominal = votes * vote_share_dem,
         third_nominal = votes * vote_share_trd) %>% 
  mutate(trump_change = trump_nominal - lag(trump_nominal),
         biden_change = biden_nominal - lag(biden_nominal)) %>% 
  mutate(time = as_datetime(timestamp))





data %<>% 
  pivot_longer(cols=c(colnames(data)[10:17]),names_to="party",values_to="vote_share") %>% 
  mutate(party = case_when(party == "vote_share_rep" ~ "Republican share",
                           party == "vote_share_dem" ~ "Democrat share",
                           party == "vote_share_trd" ~ "Third party share",
                           party == "trump_nominal" ~ "Republican votes",
                           party == "biden_nominal" ~ "Democrat votes",
                           party == "third_nominal" ~ "Third party votes",
                           party == "trump_change" ~ "Republican change",
                           party == "biden_change" ~ "Democrat change",
                           TRUE ~ "Other"))

theme_set(theme_bw())


crit_states <- c("wisconsin","nevada","georgia","michigan","minnesota","pennsylvania","arizona",'north carolina')
by_state_voteshare <- function(x){
  data <- data %>% 
  filter(state==x,
         party %in% c("Republican share","Democrat share"),
         !vote_share == 0) %>% 
  mutate(timestamp=row_number()) %>% 
  rename("step" = "timestamp")
  
  data %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line() +
    labs(title = str_to_title(x),
         y="Vote share")+
    scale_y_continuous(labels = scales::percent_format())+
    #geom_smooth(method = "lm",se=F,lty=2)+
    scale_colour_manual(values = c("blue","red"))+
    geom_hline(yintercept = mean(data$trump2016/data$votes2016),lty=2)+
    annotate("text",label=paste(sep="","Trump 2016 ",round(mean(data$trump2016/data$votes2016)*100,2),"%"),x=max(data$step-max(data$step/8)),y=mean(data$trump2016/data$votes2016-0.05*(max(data$vote_share)-min(data$vote_share))))
}



by_state_nominal <- function(x){
  data <- data %>% 
  filter(state==x,
         party %in% c("Republican votes","Democrat votes"),
         !vote_share == 0) %>% 
  mutate(timestamp=row_number()) %>% 
  rename("step" = "timestamp")
  
  data %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line() +
    labs(title = str_to_title(x),
         y="Vote share")+
    scale_y_continuous(labels = scales::number_format())+
    #geom_smooth(method = "lm",se=F,lty=2)+
    scale_colour_manual(values = c("blue","red"))+
    geom_hline(yintercept = mean(data$trump2016),lty=2)+
    annotate("text",label=paste(sep="","Trump 2016 ",""),x=max(data$step-max(data$step/8)),y=data$trump2016-0.05*(max(data$vote_share)-min(data$vote_share)))
}

library(plotly)
ga <- data %>% 
  filter(state=="georgia",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())
  
ggplotly(ga)
tx <- data %>% 
  filter(state=="texas",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

fl <- data %>% 
  filter(state=="florida",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

oh <- data %>% 
  filter(state=="ohio",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

wi <- data %>% 
  filter(state=="wisconsin",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

pa <- data %>% 
  filter(state=="pennsylvania",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

mi <- data %>% 
  filter(state=="michigan",
         party %in% c("Republican change","Democrat change"),
         !is.na(vote_share),
         !votes==0) %>% 
  mutate(timestamp=row_number()) %>%
  rename("step" = "timestamp") %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line()+
  scale_colour_manual(values = c("blue","red"))+
  scale_y_continuous(labels = scales::number_format())

ggplotly(tx)
ggplotly(oh)
ggplotly(fl)
ggplotly(mi)
ggplotly(wi)
ggplotly(pa)
by_state_voteshare <- function(x){
  data <- data %>% 
  filter(state==x,
         party %in% c("Republican share","Democrat share"),
         !vote_share == 0) %>% 
  mutate(timestamp=row_number()) %>% 
  rename("step" = "timestamp")
  
  data %>% 
  ggplot(aes(step,vote_share,colour = party))+
  geom_line() +
    labs(title = str_to_title(x),
         y="Vote share")+
    scale_y_continuous(labels = scales::percent_format())+
    #geom_smooth(method = "lm",se=F,lty=2)+
    scale_colour_manual(values = c("blue","red"))+
    geom_hline(yintercept = mean(data$trump2016/data$votes2016),lty=2)+
    annotate("text",label=paste(sep="","Trump 2016 ",round(mean(data$trump2016/data$votes2016)*100,2),"%"),x=max(data$step-max(data$step/8)),y=mean(data$trump2016/data$votes2016-0.05*(max(data$vote_share)-min(data$vote_share))))
}

data %>% 
  filter(state=='wisconsin',
         party %in% c("Republican share","Democrat share"),
         !vote_share==0,
         time < ymd(20201105)) %>% 
  ggplot(aes(time, vote_share,colour = party)) +
  geom_line()+
  scale_colour_manual(values = c("blue","red"))

data %>% 
  filter(state=='georgia',
         party %in% c("Republican share","Democrat share"),
         !vote_share==0,
         time < ymd(20201107)) %>% 
  ggplot(aes(time, vote_share,colour = party)) +
  geom_line()+
  scale_colour_manual(values = c("blue","red"))

data %>% 
  filter(state=='wisconsin',
         party %in% c("Republican votes","Democrat votes"),
         !vote_share==0,
         time < ymd(20201105)) %>% 
  ggplot(aes(time, vote_share,colour = party)) +
  geom_line()+
  scale_colour_manual(values = c("blue","red"))

data %>% 
  filter(state=='georgia',
         party %in% c("Republican votes","Democrat votes"),
         !vote_share==0,
         time < ymd(20201107)) %>% 
  ggplot(aes(time, vote_share,colour = party)) +
  geom_line()+
  scale_colour_manual(values = c("blue","red"))

data %>% 
  group_by(state) %>%
  filter(party=="Republican") %>% 
  slice(n()) %>% 
  arrange(vote_share) %>% 
  tail(5) %>% 
  select(state,vote_share) %>% 
  rename('republican_share'='vote_share')
## # A tibble: 0 x 2
## # Groups:   state [0]
## # ... with 2 variables: state <chr>, republican_share <dbl>

Safe Democrat states

data %>% 
  group_by(state) %>%
  filter(party=="Republican") %>% 
  slice(n()) %>% 
  arrange(vote_share) %>% 
  head(5)%>% 
   select(state,vote_share) %>% 
  rename('republican_share'='vote_share')
## # A tibble: 0 x 2
## # Groups:   state [0]
## # ... with 2 variables: state <chr>, republican_share <dbl>
by_state_voteshare(states[46])

by_state_voteshare(states[19])

by_state_voteshare(states[5])

by_state_voteshare(states[20])

by_state_voteshare(states[11])

by_state_voteshare(states[39])

by_state_voteshare(states[47])

by_state_voteshare(states[8])

by_state_voteshare(states[37])

by_state_voteshare(states[34])

Bellwether reference states

by_state_voteshare(states[43])

by_state_voteshare(states[9])

by_state_voteshare(states[35])

by_state_nominal(states[43])

by_state_nominal(states[9])

by_state_nominal(states[35])

States with voting irregularities

by_state_voteshare(states[22])

by_state_voteshare(states[48])

by_state_voteshare(states[10])

by_state_voteshare(states[38])

by_state_voteshare(states[4])

by_state_voteshare(states[33])

by_state_nominal(states[22])

by_state_nominal(states[48])

by_state_nominal(states[10])

by_state_nominal(states[38])

by_state_nominal(states[4])

by_state_nominal(states[33])

Other states

by_state_voteshare(states[45])